libraries here
r library(readxl) library(classdata) library(tidyverse)
## -- Attaching packages --------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.3 ## v tibble 2.1.3 v dplyr 0.8.3 ## v tidyr 1.0.0 v stringr 1.4.0 ## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------ tidyverse_conflicts() -- ## x dplyr::filter() masks stats::filter() ## x dplyr::lag() masks stats::lag()
r library(stringr) library(ggplot2) library(plotly)
## ## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2': ## ## last_plot
## The following object is masked from 'package:stats': ## ## filter
## The following object is masked from 'package:graphics': ## ## layout

import data here

fight_songs = read.csv('fight-songs.csv')
ipeds <- read_excel('IPEDS_data.xlsx', sheet='Data')
lookup_table = read.csv('lookup_table.csv')
cleaning data (VERSION 1)
1) loop through all values ipeds and fight_songs
2) add if similar name, otherwise print which names did not work
```r cleaned_ipeds <- data.frame() i <- 1 for (name in fight_songs\(school){ if (!is.na(pmatch(name, ipeds\)Name, duplicates.ok = FALSE))){ picked_row <- (pmatch(name, ipeds\(Name)) foo <- paste("works: ", name) foo <- paste(foo, ipeds\)Name[picked_row]) print(foo)
cleaned_ipeds <- rbind(cleaned_ipeds, ipeds[picked_row,] ) } else{ foo <- paste(“doesn’t work:”, name) # print(foo) } } ```
## [1] "works: Baylor Baylor University" ## [1] "works: Iowa State Iowa State University" ## [1] "works: Kansas State Kansas State University" ## [1] "works: Oklahoma State Oklahoma State University-Main Campus" ## [1] "works: Texas Tech Texas Tech University" ## [1] "works: Maryland Maryland University of Integrative Health" ## [1] "works: Michigan State Michigan State University" ## [1] "works: Nebraska Nebraska Wesleyan University" ## [1] "works: Wisconsin Wisconsin Lutheran College" ## [1] "works: Arizona State Arizona State University-Tempe" ## [1] "works: Oregon State Oregon State University" ## [1] "works: Stanford Stanford University" ## [1] "works: Washington State Washington State University" ## [1] "works: Mississippi State Mississippi State University" ## [1] "works: South Carolina South Carolina State University" ## [1] "works: Vanderbilt Vanderbilt University" ## [1] "works: Boston College Boston College" ## [1] "works: Clemson Clemson University" ## [1] "works: Duke Duke University" ## [1] "works: Florida State Florida State University" ## [1] "works: Miami Miami University-Oxford" ## [1] "works: North Carolina State North Carolina State University at Raleigh" ## [1] "works: Pitt Pittsburg State University" ## [1] "works: Syracuse Syracuse University" ## [1] "works: Wake Forest Wake Forest University"
r for(row in 1:nrow(lookup_table)){ location <- lookup_table$Column[row] i <- i + 1 cleaned_ipeds <- rbind(cleaned_ipeds, ipeds[location,] ) }

cleaning the data (VERSION 2!) added a new column for the IDs in ipeds. allows for better, through cleaning. Some words weren’t used correctly in pmatch.

for (id in fight_songs$"ID Number"){
  if (!is.na(match(name, ipeds$"ID Number"))){
    picked_row <- (match(name, ipeds$"ID Number"))
    foo <- paste("works: ", name)
    cleaned_ipeds <- rbind(cleaned_ipeds, ipeds[picked_row,] )
  }
  else{
    print("ERROR")
  }
}
cleaned_ipeds
fight_songs$ID.number <- as.double(fight_songs$ID.number)

names(fight_songs)[28] <- "ID number"

fight_songs
joining the data
```r cleaned_ipeds$year <- NULL
join_data <- left_join(cleaned_ipeds, fight_songs) ```
## Joining, by = "ID number"

jessies part

join_data2 <- left_join(fight_songs, cleaned_ipeds)
## Joining, by = "ID number"
colnames(join_data2)
##   [1] "school"                                                                                
##   [2] "conference"                                                                            
##   [3] "song_name"                                                                             
##   [4] "writers"                                                                               
##   [5] "year"                                                                                  
##   [6] "student_writer"                                                                        
##   [7] "official_song"                                                                         
##   [8] "contest"                                                                               
##   [9] "bpm"                                                                                   
##  [10] "sec_duration"                                                                          
##  [11] "fight"                                                                                 
##  [12] "number_fights"                                                                         
##  [13] "victory"                                                                               
##  [14] "win_won"                                                                               
##  [15] "victory_win_won"                                                                       
##  [16] "rah"                                                                                   
##  [17] "nonsense"                                                                              
##  [18] "colors"                                                                                
##  [19] "men"                                                                                   
##  [20] "opponents"                                                                             
##  [21] "spelling"                                                                              
##  [22] "trope_count"                                                                           
##  [23] "spotify_id"                                                                            
##  [24] "X2019_FB_Wins"                                                                         
##  [25] "X2019_FB_Losses"                                                                       
##  [26] "Niche_Athletic_Rank"                                                                   
##  [27] "Niche_Party_Rank"                                                                      
##  [28] "ID number"                                                                             
##  [29] "Name"                                                                                  
##  [30] "ZIP code"                                                                              
##  [31] "Highest degree offered"                                                                
##  [32] "County name"                                                                           
##  [33] "Longitude location of institution"                                                     
##  [34] "Latitude location of institution"                                                      
##  [35] "Religious affiliation"                                                                 
##  [36] "Offers Less than one year certificate"                                                 
##  [37] "Offers One but less than two years certificate"                                        
##  [38] "Offers Associate's degree"                                                             
##  [39] "Offers Two but less than 4 years certificate"                                          
##  [40] "Offers Bachelor's degree"                                                              
##  [41] "Offers Postbaccalaureate certificate"                                                  
##  [42] "Offers Master's degree"                                                                
##  [43] "Offers Post-master's certificate"                                                      
##  [44] "Offers Doctor's degree - research/scholarship"                                         
##  [45] "Offers Doctor's degree - professional practice"                                        
##  [46] "Offers Doctor's degree - other"                                                        
##  [47] "Offers Other degree"                                                                   
##  [48] "Applicants total"                                                                      
##  [49] "Admissions total"                                                                      
##  [50] "Enrolled total"                                                                        
##  [51] "Percent of freshmen submitting SAT scores"                                             
##  [52] "Percent of freshmen submitting ACT scores"                                             
##  [53] "SAT Critical Reading 25th percentile score"                                            
##  [54] "SAT Critical Reading 75th percentile score"                                            
##  [55] "SAT Math 25th percentile score"                                                        
##  [56] "SAT Math 75th percentile score"                                                        
##  [57] "SAT Writing 25th percentile score"                                                     
##  [58] "SAT Writing 75th percentile score"                                                     
##  [59] "ACT Composite 25th percentile score"                                                   
##  [60] "ACT Composite 75th percentile score"                                                   
##  [61] "Estimated enrollment, total"                                                           
##  [62] "Estimated enrollment, full time"                                                       
##  [63] "Estimated enrollment, part time"                                                       
##  [64] "Estimated undergraduate enrollment, total"                                             
##  [65] "Estimated undergraduate enrollment, full time"                                         
##  [66] "Estimated undergraduate enrollment, part time"                                         
##  [67] "Estimated freshman undergraduate enrollment, total"                                    
##  [68] "Estimated freshman enrollment, full time"                                              
##  [69] "Estimated freshman enrollment, part time"                                              
##  [70] "Estimated graduate enrollment, total"                                                  
##  [71] "Estimated graduate enrollment, full time"                                              
##  [72] "Estimated graduate enrollment, part time"                                              
##  [73] "Associate's degrees awarded"                                                           
##  [74] "Bachelor's degrees awarded"                                                            
##  [75] "Master's degrees awarded"                                                              
##  [76] "Doctor's degrese - research/scholarship awarded"                                       
##  [77] "Doctor's degrees - professional practice awarded"                                      
##  [78] "Doctor's degrees - other awarded"                                                      
##  [79] "Certificates of less than 1-year awarded"                                              
##  [80] "Certificates of 1 but less than 2-years awarded"                                       
##  [81] "Certificates of 2 but less than 4-years awarded"                                       
##  [82] "Postbaccalaureate certificates awarded"                                                
##  [83] "Post-master's certificates awarded"                                                    
##  [84] "Number of students receiving an Associate's degree"                                    
##  [85] "Number of students receiving a Bachelor's degree"                                      
##  [86] "Number of students receiving a Master's degree"                                        
##  [87] "Number of students receiving a Doctor's degree"                                        
##  [88] "Number of students receiving a certificate of less than 1-year"                        
##  [89] "Number of students receiving a certificate of 1 but less than 4-years"                 
##  [90] "Number of students receiving a Postbaccalaureate or Post-master's certificate"         
##  [91] "Percent admitted - total"                                                              
##  [92] "Admissions yield - total"                                                              
##  [93] "Tuition and fees, 2010-11"                                                             
##  [94] "Tuition and fees, 2011-12"                                                             
##  [95] "Tuition and fees, 2012-13"                                                             
##  [96] "Tuition and fees, 2013-14"                                                             
##  [97] "Total price for in-state students living on campus 2013-14"                            
##  [98] "Total price for out-of-state students living on campus 2013-14"                        
##  [99] "State abbreviation"                                                                    
## [100] "FIPS state code"                                                                       
## [101] "Geographic region"                                                                     
## [102] "Sector of institution"                                                                 
## [103] "Level of institution"                                                                  
## [104] "Control of institution"                                                                
## [105] "Historically Black College or University"                                              
## [106] "Tribal college"                                                                        
## [107] "Degree of urbanization (Urban-centric locale)"                                         
## [108] "Carnegie Classification 2010: Basic"                                                   
## [109] "Total  enrollment"                                                                     
## [110] "Full-time enrollment"                                                                  
## [111] "Part-time enrollment"                                                                  
## [112] "Undergraduate enrollment"                                                              
## [113] "Graduate enrollment"                                                                   
## [114] "Full-time undergraduate enrollment"                                                    
## [115] "Part-time undergraduate enrollment"                                                    
## [116] "Percent of total enrollment that are American Indian or Alaska Native"                 
## [117] "Percent of total enrollment that are Asian"                                            
## [118] "Percent of total enrollment that are Black or African American"                        
## [119] "Percent of total enrollment that are Hispanic/Latino"                                  
## [120] "Percent of total enrollment that are Native Hawaiian or Other Pacific Islander"        
## [121] "Percent of total enrollment that are White"                                            
## [122] "Percent of total enrollment that are two or more races"                                
## [123] "Percent of total enrollment that are Race/ethnicity unknown"                           
## [124] "Percent of total enrollment that are Nonresident Alien"                                
## [125] "Percent of total enrollment that are Asian/Native Hawaiian/Pacific Islander"           
## [126] "Percent of total enrollment that are women"                                            
## [127] "Percent of undergraduate enrollment that are American Indian or Alaska Native"         
## [128] "Percent of undergraduate enrollment that are Asian"                                    
## [129] "Percent of undergraduate enrollment that are Black or African American"                
## [130] "Percent of undergraduate enrollment that are Hispanic/Latino"                          
## [131] "Percent of undergraduate enrollment that are Native Hawaiian or Other Pacific Islander"
## [132] "Percent of undergraduate enrollment that are White"                                    
## [133] "Percent of undergraduate enrollment that are two or more races"                        
## [134] "Percent of undergraduate enrollment that are Race/ethnicity unknown"                   
## [135] "Percent of undergraduate enrollment that are Nonresident Alien"                        
## [136] "Percent of undergraduate enrollment that are Asian/Native Hawaiian/Pacific Islander"   
## [137] "Percent of undergraduate enrollment that are women"                                    
## [138] "Percent of graduate enrollment that are American Indian or Alaska Native"              
## [139] "Percent of graduate enrollment that are Asian"                                         
## [140] "Percent of graduate enrollment that are Black or African American"                     
## [141] "Percent of graduate enrollment that are Hispanic/Latino"                               
## [142] "Percent of graduate enrollment that are Native Hawaiian or Other Pacific Islander"     
## [143] "Percent of graduate enrollment that are White"                                         
## [144] "Percent of graduate enrollment that are two or more races"                             
## [145] "Percent of graduate enrollment that are Race/ethnicity unknown"                        
## [146] "Percent of graduate enrollment that are Nonresident Alien"                             
## [147] "Percent of graduate enrollment that are Asian/Native Hawaiian/Pacific Islander"        
## [148] "Percent of graduate enrollment that are women"                                         
## [149] "Number of first-time undergraduates - in-state"                                        
## [150] "Percent of first-time undergraduates - in-state"                                       
## [151] "Number of first-time undergraduates - out-of-state"                                    
## [152] "Percent of first-time undergraduates - out-of-state"                                   
## [153] "Number of first-time undergraduates - foreign countries"                               
## [154] "Percent of first-time undergraduates - foreign countries"                              
## [155] "Number of first-time undergraduates - residence unknown"                               
## [156] "Percent of first-time undergraduates - residence unknown"                              
## [157] "Graduation rate - Bachelor degree within 4 years, total"                               
## [158] "Graduation rate - Bachelor degree within 5 years, total"                               
## [159] "Graduation rate - Bachelor degree within 6 years, total"                               
## [160] "Percent of freshmen receiving any financial aid"                                       
## [161] "Percent of freshmen receiving federal, state, local or institutional grant aid"        
## [162] "Percent of freshmen  receiving federal grant aid"                                      
## [163] "Percent of freshmen receiving Pell grants"                                             
## [164] "Percent of freshmen receiving other federal grant aid"                                 
## [165] "Percent of freshmen receiving state/local grant aid"                                   
## [166] "Percent of freshmen receiving institutional grant aid"                                 
## [167] "Percent of freshmen receiving student loan aid"                                        
## [168] "Percent of freshmen receiving federal student loans"                                   
## [169] "Percent of freshmen receiving other loan aid"                                          
## [170] "Endowment assets (year end) per FTE enrollment (GASB)"                                 
## [171] "Endowment assets (year end) per FTE enrollment (FASB)"
speed_plot <- join_data2 %>%
  mutate('conference' = as.factor(join_data2$conference)) %>%
  ggplot(aes(x = sec_duration, y = bpm, color = join_data2$conference, text = paste0("<b>Song Name: </b>", join_data2$song_name, "<br>", "<b>School: <b>", join_data2$school))) +
  geom_point() +
  geom_hline(yintercept = median(join_data2$bpm)) +
  geom_vline(xintercept = median(join_data2$sec_duration)) +
  xlab("Song Length (Seconds)") +
  ylab("Song Speed (bpm)") +
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank())

speed_plotly <- ggplotly(speed_plot, tooltip = "text")

speed_plotly
join_data2 <- join_data2 %>%
  mutate(speed_cat = if_else(join_data2$bpm < 140 & join_data2$sec_duration < 67, 'Short & Slow', if_else(join_data2$bpm < 140 & join_data2$sec_duration > 67, 'Long & Slow', if_else(join_data2$bpm > 140 & join_data2$sec_duration < 67, 'Short & Fast', 'Long & Fast')))) 

map <- map_data('state')

join_data2$year <- as.numeric(as.character(join_data2$year))
## Warning: NAs introduced by coercion
filter1900 <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1900)

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filter1900, 
             aes(x = filter1900$'Longitude location of institution', y = filter1900$'Latitude location of institution', color = filter1900$student_writer)) +
  geom_label(data = filter1900, aes(filter1900$'Longitude location of institution', filter1900$'Latitude location of institution', label = year, color = filter1900$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE) +
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Ignoring unknown parameters: check_overlap

filter1905 <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1910 & join_data2$year > 1900)

filter1905T <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1910)

filter1905T$student_writer <- factor(filter1905T$student_writer, levels = c("Yes", "No", "Unknown"))

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filter1905T, 
             aes(x = filter1905T$'Longitude location of institution', y = filter1905T$'Latitude location of institution', color = filter1905T$student_writer)) +
  geom_label(data=filter1905, aes(filter1905$'Longitude location of institution', filter1905$'Latitude location of institution', label = year, color = filter1905$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE) +
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_label).

filter <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1915 & join_data2$year > 1910)

filterT <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1915)

filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filterT, 
             aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
  geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE) +
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 1 rows containing missing values (geom_point).

filter <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1920 & join_data2$year > 1915)

filterT <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1920)

filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filterT, 
             aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
  geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE) +
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Ignoring unknown parameters: check_overlap

## Warning: Removed 1 rows containing missing values (geom_point).

filter <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1930 & join_data2$year > 1920)

filterT <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1930)

filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filterT, 
             aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
  geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE)+
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_label).

filter <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1940 & join_data2$year > 1930)

filterT <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1940)

filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filterT, 
             aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
  geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE)+
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_label).

filter <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1950 & join_data2$year > 1940)

filterT <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1950)

filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filterT, 
             aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
  geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE)+
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 4 rows containing missing values (geom_point).

filter <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1960 & join_data2$year > 1950)

filterT <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1960)

filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filterT, 
             aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
  geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE)+
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 5 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_label).

filter <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1970 & join_data2$year > 1960)

filterT <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1970)

filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filterT, 
             aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
  geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE)+
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_label).

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filterT, 
             aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Removed 6 rows containing missing values (geom_point).

join_data2 <- join_data2 %>%
  mutate(age = if_else(join_data2$year <= 1922, 'old', 'new'))

join_data2$student_writer <- factor(join_data2$student_writer, levels = c("Yes", "No", "Unknown"))

join_data2 %>%
  group_by(age, student_writer) %>%
  ggplot(aes(age, fill = student_writer), rm.na = TRUE) +
  geom_bar()+
  scale_fill_manual(values=c("red", "blue", "white"))

join_data2 %>%
  group_by(speed_cat) %>%
  ggplot(aes(x = speed_cat, y = year, color = student_writer)) +
  geom_point()+
  geom_boxplot() +
  scale_colour_brewer(palette = "Set1") +
  xlab("Length & Speed")
## Warning: Removed 5 rows containing non-finite values (stat_boxplot).
## Warning: Removed 5 rows containing missing values (geom_point).

join_data2 %>%
  ggplot(aes(x = Niche_Athletic_Rank, y = X2019_FB_Wins, color = as.factor(number_fights))) +
  geom_point() +
   scale_colour_brewer(palette = "Set1")
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: Removed 3 rows containing missing values (geom_point).

join_data2 %>%
  ggplot(aes(x = join_data2$'Tuition and fees, 2013-14', y = Niche_Athletic_Rank, color = speed_cat)) +
  geom_point()
## Warning: Removed 6 rows containing missing values (geom_point).

join_data2 %>%
  ggplot(aes(x = Niche_Party_Rank, y = join_data2$'Graduation rate - Bachelor degree within 4 years, total', color = nonsense)) +
  geom_point()
## Warning: Removed 6 rows containing missing values (geom_point).

join_data2 %>%
  ggplot(aes(x = men, y = join_data2$'Percent of total enrollment that are women', fill = men)) +
  geom_boxplot() +
  xlab("Does the song refer to either men or boys?") +
  ylab("Percentage of Students who are Female")
## Warning: Removed 6 rows containing non-finite values (stat_boxplot).

join_data2 %>%
  ggplot(aes(x = spelling, y = join_data2$'SAT Writing 75th percentile score', color = spelling)) +
  geom_point(size = 3) +
  scale_colour_brewer(palette = "Set1") +
  xlab("Does the song spell out words?") +
  ylab("SAT Writing 75th Percentile Score")
## Warning: Removed 28 rows containing missing values (geom_point).

join_data2 %>%
  ggplot(aes(x = spelling, y = join_data2$'SAT Writing 25th percentile score', color = spelling)) +
  geom_point(size = 3) +
  scale_colour_brewer(palette = "Set1") +
  xlab("Does the song spell out words?") +
  ylab("SAT Writing 25th Percentile Score")
## Warning: Removed 28 rows containing missing values (geom_point).

plot2 <- join_data2 %>%
  group_by(speed_cat) %>%
  ggplot(aes(x = Niche_Athletic_Rank, y = Niche_Party_Rank, color = speed_cat, text = school)) +
  geom_point()+
  scale_colour_brewer(palette = "Set1") +
  xlab("Athletic Ranking") +
  ylab("Party Ranking")

plotly2 <- ggplotly(plot2, tooltip = "text")

plotly2
join_data_longer <- join_data2 %>%
  pivot_longer(victory:spelling, names_to = 'lyric', values_to='score')

join_data_longer %>%
  group_by(lyric) %>%
  ggplot(aes(x = score, y = X2019_FB_Wins)) +
  geom_boxplot() +
  facet_wrap(~lyric) +
  ylab("2019 Football Wins") +
  xlab("")

join_data_longer %>%
  group_by(lyric) %>%
  ggplot(aes(x = score, fill = score)) +
  geom_bar() +
  facet_wrap(~lyric) +
  xlab("") +
  scale_fill_manual(values=c("red", "green"))

henrys part
matthews part
r join_data = join_data %>% rename(Wins = X2019_FB_Wins, Losses = X2019_FB_Losses) ggplot(join_data, aes(x = Niche_Athletic_Rank, y = Niche_Party_Rank)) + geom_point(aes(size = Wins / 3, shape = student_writer, color = conference)) + xlim(-10, 110) + ylim(-10, 270) + geom_vline(xintercept = 50) + geom_hline(yintercept = 120) + ggtitle("Athletic rank vs Party rank") + xlab("Athletic Rank") + ylab("Party Rank")
## Warning: Removed 9 rows containing missing values (geom_point).
## Party schools fight songs writer varies while party rank < 100 but mostly written by non-students while party rank > 100. ## The biggest Party and athletic schools are from the Big Ten and SEC. The smallest ones are from ACC and the Pac-12.

anns part (if doing more than cleaning)